clear all
set more off
capture log close

*to run these files, you will need to replace the following path with the corresponding path from your own computer

global mainpath "~/Dropbox/Maintenance/cleaning_experiment_full_paper/Replication_Folder"
cd $mainpath

* paths
global path_graphs "graphs"
global path_stata_graphs "stata_graphs"
global path_supplementary_graphs "supplementary_graphs"
global path_tables "tables"
global path_data "data"

set scheme s1mono

***********************
***** analysis *****
***********************

use $path_data/data_tw_anonymized_jan_2023, clear

* water available
bys project: tab water_available
gen water_available_d = (water_available == "yes") if water_available != ""

*gen pooled training treatment dummy

gen training_treatment_pooled = training_treatment_1 + training_treatment_2
label var  training_treatment_pooled "Training, any"

*keep only experimental data 

keep if data21_water_available == "yes"

*Merge with 2021 data for randomization checks

merge 1:1 swoid using $path_data/data_tw_anonymized_2021, keepusing(*first*) keep(match)

rename *first *2021 

*balance check using 2021 contamination 

gen ecoli_present_2021 = (ecoli_mpn_2021 > 0) if ecoli_mpn_2021 != .
gen tot_coli_present_2021 = (tot_coli_mpn_2021 > 0) if tot_coli_mpn_2021 != .

gen asinh_ecoli_2021 = asinh(ecoli_mpn_2021)
gen asinh_tot_coli_2021 = asinh(tot_coli_mpn_2021)


local i = 1 

foreach v in ecoli_present_2021 tot_coli_present_2021 asinh_ecoli_2021 asinh_tot_coli_2021 {
	
	reg `v' training_treatment_1 training_treatment_2, vce(hc3)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]

	estimates store e_rand_checks_`i'
	local i = `i' + 1

	reg `v' training_treatment_pooled, vce(hc3)
	
	estadd scalar control_mean = _b[_cons]

	estimates store e_rand_checks_`i'
	local i = `i' + 1

}

*Make Supplementary Table 6

esttab e_rand_checks_* using "$path_tables/rand_checks.tex", replace booktabs keep(training_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) stats(p_1vs2 control_mean, layout(@) fmt(%05.3f %04.2f) labels("T vs T \$+\$ S: \$ p \$ value" "Control mean")) mgroups("\emph{E. coli} present" "TC present""\emph{E. coli} (MPN)" "TC (MPN)", pattern(1 0 1 0 1 0 1 0) span prefix(\multicolumn{@span}{c}{) suffix(}))

*check for effects on water availability

reg water_available_d training_treatment_1 training_treatment_2, vce(hc3)

test training_treatment_1 = training_treatment_2
estadd scalar p_1vs2 = r(p)
estadd scalar control_mean = _b[_cons]

estimates store e_function_1

reg water_available_d training_treatment_pooled, vce(hc3)
estadd scalar control_mean = _b[_cons]

estimates store e_function_2

*Make Supplementary Table 2

esttab e_function_* using "$path_tables/function_r1.tex", replace booktabs keep(training_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) stats(p_1vs2 control_mean, layout(@) fmt(%05.3f %04.2f) labels("T vs T \$+\$ S: \$ p \$ value" "Control mean"))  mgroups("Functioning", pattern(1 0) span prefix(\multicolumn{@span}{c}{) suffix(}))

*estimate treatment effects on water quality

gen ecoli_present = (ecoli_mpn > 0) if ecoli_mpn != .
gen tot_coli_present = (tot_coli_mpn > 0) if tot_coli_mpn != .

gen asinh_ecoli = asinh(ecoli_mpn)
gen asinh_tot_coli = asinh(tot_coli_mpn)

*run regressions

local i = 1

foreach v in ecoli_present tot_coli_present asinh_ecoli asinh_tot_coli {
	
	reg `v' training_treatment_1 training_treatment_2, vce(hc3) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]


	estimates store e_main_`i'
	local i = `i' + 1

	reg `v' training_treatment_pooled, vce(hc3) level(90)
	
	estadd scalar control_mean = _b[_cons]

	estimates store e_main_`i'
	local i = `i' + 1

}

*Make Supplementary Table S8

esttab e_main_* using "$path_tables/main_results.tex", replace booktabs keep(training_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) stats(p_1vs2 control_mean, layout(@) fmt(%05.3f %04.2f) labels("T vs T \$+\$ S: \$ p \$ value" "Control mean")) mgroups("\emph{E. coli} present" "TC present""\emph{E. coli} (MPN)" "TC (MPN)", pattern(1 0 1 0 1 0 1 0) span prefix(\multicolumn{@span}{c}{) suffix(}))


*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

local i = 1

foreach v in ecoli_present tot_coli_present asinh_ecoli asinh_tot_coli {
	
	mat `v'_g = J(4,4,.)
	
	estimates restore e_main_`i'
	
	mat `v'_g[1,1] = 1
	mat `v'_g[1,2] = _b[_cons]
	mat `v'_g[1,3] = _b[_cons] - 1.645 * _se[_cons]
	mat `v'_g[1,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
		
		local k = `j' + 1
	
		mat `v'_g[`k',1] = `k' + 1
		mat `v'_g[`k',2] = r(estimate)
		mat `v'_g[`k',3] = r(lb)
		mat `v'_g[`k',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
	
	local i = `i' + 1
	estimates restore e_main_`i'
	
	lincom _cons + training_treatment_pooled, level(90)
		
	local k = 4
	
	mat `v'_g[`k',1] = 6
	mat `v'_g[`k',2] = r(estimate)
	mat `v'_g[`k',3] = r(lb)
	mat `v'_g[`k',4] = r(ub)
	
	test training_treatment_pooled
	local `v'_p_t = r(p)
		
	local i = `i' + 1

}

*plot results sub-graphs 

foreach v in ecoli_present tot_coli_present asinh_ecoli asinh_tot_coli {
	
	if "`v'" == "ecoli_present" {
		
		local h1 0.35
		local h1_l 0.34
		local h1_u 0.37
		
		local h2 0.4
		local h2_l 0.39
		local h2_u 0.42
		
		local h3 0.45
		local h3_l 0.44
		local h3_u 0.47

		local ys "-0.03 0.49"
		local yl "0(0.1)0.3"
		
		local yvar "Share of wells"
		local subtitle "a) Presence of {it:E. coli}, 13 months"	
	}
	
	if "`v'" == "tot_coli_present" {
		
		local h1 1.0
		local h1_l 0.98
		local h1_u 1.03
		
		local h2 1.1
		local h2_l 1.08
		local h2_u 1.13

		local h3 1.2
		local h3_l 1.18
		local h3_u 1.23
		
		local ys "0 1.25"
		local yl "0(0.2)0.8"
		
		local yvar "Share of wells"
		local subtitle "c) Presence of coliform bacteria, 13 months"
	
	}
	
	
	if "`v'" == "asinh_ecoli" {
		
		local h1 1.65
		local h1_l 1.62
		local h1_u 1.69
		
		local h2 1.75
		local h2_l 1.72
		local h2_u 1.79

		local h3 1.85
		local h3_l 1.82
		local h3_u 1.89
		
		local ys "-0.15 0.85"
		local yl `" 0 "0" 0.881 "1" 1.444 "2" 1.818 "3" "'
		
		local yvar "E. coli (MPN)"
		local subtitle "a) Concentration of {it:E. coli}, 13 months"

	
	}
	
	if "`v'" == "asinh_tot_coli" {
		
		local h1 4.6
		local h1_l 4.52
		local h1_u 4.75
		
		local h2 4.9
		local h2_l 4.82
		local h2_u 5.05

		local h3 5.2
		local h3_l 5.12
		local h3_u 5.35
		
		local ys "0 5.4"
		local yl `" 0 "0"  0.881 "1" 1.444 "2" 2.095 "4" 2.776 "8" 3.467 "16" 4.159 "32" 4.852 "64" "'
		
		local yvar "Total coliforms (MPN)"
		local subtitle "c) Concentration of total coliform bacteria,  13 months"

	
	}
	
	local b1 1 
	local b2_l 2.9
	local b2_r 3.1
	local b3 4
	local b4 6
	
	local b_p1 = 0.5 * (`b1' + `b2_l')
	local b_p2 = 0.5 * (`b2_r' + `b3')
	local b_p3 = 0.5 * (`b1' + `b3')
	local b_p4 = 0.5 * (`b1' + `b4')
	
	local brackets "(scatteri `h1' `b1' `h1' `b2_l',  recast(line) lw(medthin)  mc(none) lc(black) lp(solid)) (scatteri `h1' `b1' `h1' `b2_l',  recast(dropline) base(`h1_l') lw(medthin) mc(none) lc(black) lp(solid)) (scatteri `h1' `b2_r' `h1' `b3',  recast(line) lw(medthin)  mc(none) lc(black) lp(solid)) (scatteri `h1' `b2_r' `h1' `b3',  recast(dropline) base(`h1_l') lw(medthin) mc(none) lc(black) lp(solid)) (scatteri `h2' `b1' `h2' `b3',  recast(line) lw(medthin)  mc(none) lc(black) lp(solid))   (scatteri `h2' `b1' `h2' `b3',  recast(dropline) base(`h2_l') lw(medthin) mc(none) lc(black) lp(solid)) (scatteri `h3' `b1' `h3' `b4', recast(line) lw(medthin)  mc(none) lc(black) lp(solid))   (scatteri `h3' `b1' `h3' `b4', recast(dropline) base(`h3_l') lw(medthin) mc(none) lc(black) lp(solid)) "
	
	foreach p in `v'_p_t1 `v'_p_t2 `v'_p_t1vst2 `v'_p_t {
	
		if ``p'' < 0.001 {
			local `p'_f = "p < 0.001***"
		} 
		
		else if ``p'' < 0.01 {
			local `p'_f = "p = " + strofreal(``p'',"%05.3f") + "***"
		} 

		else if ``p'' < 0.05 {
			local `p'_f = "p = " + strofreal(``p'',"%05.3f") + "**"
		} 

		else if ``p'' < 0.1 {
			local `p'_f = "p = " + strofreal(``p'',"%05.3f") + "*"
		} 

		else local `p'_f = "p = " + strofreal(``p'',"%05.3f")	
	
	}
	
	local pvals "text(`h1_u' `b_p1' `"``v'_p_t1_f'"', placement(c) justification(center) size(small)) text(`h1_u' `b_p2' `"``v'_p_t1vst2_f'"', placement(c) justification(center) size(small)) text(`h2_u' `b_p3' `"``v'_p_t2_f'"', placement(c) justification(center) size(small)) text(`h3_u' `b_p4' `"``v'_p_t_f'"', placement(c) justification(center) size(small))  "
	
	
	capture drop `v'_g*
	svmat `v'_g
	
	twoway (bar `v'_g2 `v'_g1 if `v'_g1 == 1, bcolor(ebblue*0.75) barwidth(0.5)) (bar `v'_g2 `v'_g1 if `v'_g1 == 3, bcolor(orange*0.5) barwidth(0.5)) (bar `v'_g2 `v'_g1 if `v'_g1 == 4, bcolor(orange*0.75) barwidth(0.5)) (bar `v'_g2 `v'_g1 if `v'_g1 == 6, bcolor(orange*1.25) barwidth(0.5)) (rcap `v'_g3 `v'_g4 `v'_g1) `brackets', `pvals' legend(off) xlabel(1 "Control" 3 "Training" 4 `" "Training" "+ supplies" "' 6 `" "Training," "pooled" "', labsize(small) tlength(0))  subtitle("`subtitle'", size(small) position(11) span) xtitle("") ytitle("`yvar'", size(small)) xsize(6) ysize(8) yscale(range(`ys') lstyle(none)) ylabel(`yl', labsize(small) ) yline(0) xscale(range(0.5 6.5) lstyle(none) ) plotregion(style(none)) name(`v'_results, replace)

	graph save $path_stata_graphs/`v'_results, replace

}

*users 

gen asinh_users_N_reported = asinh(users_num_reported)
gen asinh_users_N_observed = asinh(users_num_observed)
	
*run regressions

local i = 1

foreach v in asinh_users_N_reported asinh_users_N_observed {
	
	reg `v' training_treatment_1 training_treatment_2, vce(hc3) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]


	estimates store e_users_`i'
	local i = `i' + 1

	reg `v' training_treatment_pooled, vce(hc3) level(90)
	
	estadd scalar control_mean = _b[_cons]

	estimates store e_users_`i'
	local i = `i' + 1

}
	
*Make Supplementary Table 4
	
esttab e_users_* using "$path_tables/users.tex", replace booktabs keep(training_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) stats(p_1vs2 control_mean, layout(@) fmt(%05.3f %04.2f) labels("T vs T \$+\$ S: \$ p \$ value" "Control mean")) mgroups("Reported" "Observed" , pattern(1 0 1 0) span prefix(\multicolumn{@span}{c}{) suffix(}))

*persistent effects of cleaning treatments 

*Merge with 2021 data for randomization checks

capture drop _merge

merge 1:1 swoid using $path_data/data_tw_anonymized_2021, keepusing(cleaning_treatment_1 cleaning_treatment_2 cleaning_treatment_3) keep(match)

local i = 1 

foreach v in ecoli_present tot_coli_present asinh_ecoli asinh_tot_coli {
	
	reg `v' cleaning_treatment_1 cleaning_treatment_2 cleaning_treatment_3, vce(hc3)

	estimates store e_persistence_`i'
	local i = `i' + 1

}

esttab e_persistence_* using "$path_tables/persistence.tex", replace booktabs keep(cleaning_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) mgroups("\emph{E. coli} present" "TC present""\emph{E. coli} (MPN)" "TC (MPN)", pattern(1 1 1 1) span prefix(\multicolumn{@span}{c}{) suffix(}))
	
*analyze caretaker data

use $path_data/data_caretakers_anonymized_jan_2023, clear

*keep only functioning wells 

keep if water_available == "yes"

*drop two wells (one in each training treatment arm) where caretaker data lost

drop if _merge == 2

*count obs

unique tw if training_treatment == "control"
*57 caretaker surveys from 29 wells; one well with only one caretaker survey

unique tw if training_treatment == "training_and_supplies"
*56 caretaker surveys from 28 wells

unique tw if training_treatment == "training_only"
*58 caretaker surveys from 30 wells; two wells with only one caretaker survey

*NB: because of pull data errors (a coding error in the form pulled status by community, not by well, sometimes resulting in mismatches), we wrongly asked some questions of control caretakers and wrongly didn't ask some of training assigned caretakers.  

*In total it seems that we misassigned 12 control caretakers, 2 training and supply caretakers and 2 training_only caretakers in this way

*compare attendance at training by records and self-reports

tab training_treatment training_attendance, row

*binarize 

gen training_attendance_d = regexm(training_attendance,"yes") if training_attendance != ""

*no control household that was accidentally asked this question reported attending training; 98% of caretakers report attending training if assigned to training and supplies (and asked the question), 82% report attending training if assigned to training only

gen attendance_records =  tr_list_caretaker_1_attendance if respondent == "caretaker_1"
replace attendance_records = tr_list_caretaker_2_attendance if respondent == "caretaker_2"

*binarize 
	
gen attendance_records_d = regexm(attendance_records,"yes") if attendance_records != ""
tab training_treatment attendance_records_d, row

*our records are very consistent: we recorded 93% attending under training and supplies and 83% attending under training only

*survey responses and project records internally consistent: 100/104 gave same response.  3 said they attended when our records said did not (all training and supplies).  1 did not know when our records said they did (training only)

tab attendance_records_d training_attendance
bys training_treatment: tab attendance_records_d training_attendance

*weight by no. of caretaker surveys/well

bys tw: egen n_caretaker_surveys = count(respondent)	
gen weight = 1/n_caretaker_surveys

*run regressions comparing self-reported attendance and attendance by project record 

local i = 1

foreach v in attendance_records_d training_attendance_d {
	
	reg `v' training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw) level(90) nocons

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)

	estimates store e_attend_`i'
	
	local i = `i' + 1

}

*save results in matrix 

mat attend_g = J(4,4,.)

forvalues i = 1(1)2 {

	estimates restore e_attend_`i'
	

	forvalues j = 1(1)2 {
	
		local k =  (`i'-1)*2 +`j'
		
		mat attend_g[`k',1] = `k' + `i' - 1
		mat attend_g[`k',2] = _b[training_treatment_`j']
		mat attend_g[`k',3] = _b[training_treatment_`j'] - 1.645 * _se[training_treatment_`j']
		mat attend_g[`k',4] = _b[training_treatment_`j'] + 1.645 * _se[training_treatment_`j']	
	}
	
	test training_treatment_1 = training_treatment_2
	local p = r(p)
	
	if `p' < 0.001 {
		local attend_`i'_p_t1vst2 = "p < 0.001***"
	} 
	
	if `p' < 0.01 {
		local attend_`i'_p_t1vst2 = "p = " + strofreal(`p',"%05.3f") + "***"
	} 

	else if `p' < 0.05 {
		local attend_`i'_p_t1vst2 = "p = " + strofreal(`p',"%05.3f") + "**"
	} 

	else if `p' < 0.1 {
		local attend_`i'_p_t1vst2 = "p = " + strofreal(`p',"%05.3f") + "*"
	} 

	else local attend_`i'_p_t1vst2 = "p = " + strofreal(`p',"%05.3f")		

}

*Make Supplementary Figure 5

capture drop attend_g*
svmat attend_g

twoway (bar attend_g2 attend_g1 if attend_g1 == 1, bcolor(orange*0.5) barwidth(0.5)) (bar attend_g2 attend_g1 if attend_g1 == 2, bcolor(orange*0.75) barwidth(0.5)) (bar attend_g2 attend_g1 if attend_g1 == 4, bcolor(orange*0.5) barwidth(0.5)) (bar attend_g2 attend_g1 if attend_g1 == 5, bcolor(orange*0.75) barwidth(0.5)) (rcap attend_g3 attend_g4 attend_g1) (scatteri 1.05 1 1.05 2,  recast(line) lw(medthin)  mc(none) lc(black) lp(solid)) (scatteri 1.05 1 1.05 2,  recast(dropline) base(1.025) lw(medthin) mc(none) lc(black) lp(solid)) (scatteri 1.05 4 1.05 5,  recast(line) lw(medthin)  mc(none) lc(black) lp(solid)) (scatteri 1.05 4 1.05 5,  recast(dropline) base(1.025) lw(medthin) mc(none) lc(black) lp(solid)), text(1.075 1.5 "`attend_1_p_t1vst2'", placement(c) justification(center) size(small))  text(1.075 4.5 "`attend_2_p_t1vst2'", placement(c) justification(center) size(small)) legend(off) xlabel(1 "Training" 1.5 `" " " " " " " "Project records" "' 2 `" "Training" "+ supplies" "' 4 "Training" 4.5 `" " " " " " " "Self-reported data" "' 5 `" "Training" "+ supplies" "', labsize(small) tlength(0)) xtitle("")  ytitle("Share of caretakers attending training", size(small)) xsize(6) ysize(8) yscale(range(0 1.1) lstyle(none)) ylabel(0(0.2)1, labsize(small) ) yline(0) xscale(range(0.5 5.5) lstyle(none)) plotregion(style(none)) name(attend_results, replace)

graph export $path_supplementary_graphs/attend_results.png, replace

*regressions

local i = 1

foreach v in  knowledge_brush knowledge_scrub knowledge_wash {
	
	reg `v' training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]

	estimates store e_knowledge_`i'
	local i = `i' + 1

}


*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

mat knowledge_g = J(9,4,.)

local i = 1

foreach v in knowledge_wash knowledge_scrub knowledge_brush {
	
	estimates restore e_knowledge_`i'
	
	mat knowledge_g[3*`i'-2,1] = 4*`i'-3
	
	mat knowledge_g[3*`i'-2,2] = _b[_cons]
	mat knowledge_g[3*`i'-2,3] = _b[_cons] - 1.645 * _se[_cons]
	mat knowledge_g[3*`i'-2,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
			
		mat knowledge_g[3*`i'-2+`j',1] = 4*`i' - 3 + `j'
		mat knowledge_g[3*`i'-2+`j',2] = r(estimate)
		mat knowledge_g[3*`i'-2+`j',3] = r(lb)
		mat knowledge_g[3*`i'-2+`j',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
		
	local i = `i' + 1

}	
	
capture drop knowledge_g*
svmat knowledge_g

*Plot subgraph

twoway (bar knowledge_g2 knowledge_g1 if knowledge_g1 == 1 | knowledge_g1 == 5 | knowledge_g1 == 9, bcolor(ebblue*0.75) barwidth(0.5)) (bar knowledge_g2 knowledge_g1 if knowledge_g1 == 2 | knowledge_g1 == 6 | knowledge_g1 == 10, bcolor(orange*0.5) barwidth(0.5))  (bar knowledge_g2 knowledge_g1 if knowledge_g1 == 3 | knowledge_g1 == 7 | knowledge_g1 == 11, bcolor(orange*0.75) barwidth(0.5)) (rcap knowledge_g3 knowledge_g4 knowledge_g1), legend(off) xmlabel(1 "C" 2 "T"  3 "T + S" 5 "C" 6 "T"  7 "T + S" 9 "C" 10 "T"  11 "T + S", labsize(small) tlength(0))  xlabel(2 `" "Clean accessible parts" "with a stiff brush" "' 6 `""Scrub accessible parts" "with chlorine solution" "'  10 `""Wash interior with" "chlorine solution" "', labsize(vsmall) tlength(0) labgap(40pt)) subtitle("a) Caretaker knowledge, 13 months", size(small) position(11) span)  xtitle("")  ytitle("Share of caretakers listing as best practice", size(small))  xsize(8) ysize(12) yscale(range(0 1) lstyle(none)) ylabel(0(0.2)1, labsize(small) ) yline(0) xscale(range(0.5 11.5) lstyle(none)) plotregion(style(none)) name(knowledge_results, replace)

graph save $path_stata_graphs/knowledge_comparisons_round1, replace

*practice: share regularly practicing each of the three steps

*binarize variables (all who knew of practice reported practicing at least rarely, so focus on whether they reported doing it regularly)

foreach v in freq_brush freq_scrub freq_wash {	
	gen `v'_reg = (`v' == "regularly") 
}

gen n_reg = freq_brush_reg + freq_scrub_reg + freq_wash_reg 


*regressions 
 
local i = 1

foreach v in freq_brush_reg freq_scrub_reg freq_wash_reg {
	
	reg `v' training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]

	estimates store e_practice_`i'
	local i = `i' + 1

}

*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

mat practice_g = J(9,4,.)

local i = 1

foreach v in freq_brush_reg freq_scrub_reg freq_wash_reg {
	
	estimates restore e_practice_`i'
	
	mat practice_g[3*`i'-2,1] = 4*`i'-3
	
	mat practice_g[3*`i'-2,2] = _b[_cons]
	mat practice_g[3*`i'-2,3] = _b[_cons] - 1.645 * _se[_cons]
	mat practice_g[3*`i'-2,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
			
		mat practice_g[3*`i'-2+`j',1] = 4*`i' - 3 + `j'
		mat practice_g[3*`i'-2+`j',2] = r(estimate)
		mat practice_g[3*`i'-2+`j',3] = r(lb)
		mat practice_g[3*`i'-2+`j',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
		
	local i = `i' + 1

}	
 
*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

mat practice_g = J(9,4,.)

local i = 1

foreach v in freq_brush_reg freq_scrub_reg freq_wash_reg {
	
	estimates restore e_practice_`i'
	
	mat practice_g[3*`i'-2,1] = 4*`i'-3
	
	mat practice_g[3*`i'-2,2] = _b[_cons]
	mat practice_g[3*`i'-2,3] = _b[_cons] - 1.645 * _se[_cons]
	mat practice_g[3*`i'-2,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
			
		mat practice_g[3*`i'-2+`j',1] = 4*`i' - 3 + `j'
		mat practice_g[3*`i'-2+`j',2] = r(estimate)
		mat practice_g[3*`i'-2+`j',3] = r(lb)
		mat practice_g[3*`i'-2+`j',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
		
	local i = `i' + 1

}	

*Plot subgraph
	
capture drop practice_g*
svmat practice_g

twoway (bar practice_g2 practice_g1 if practice_g1 == 1 | practice_g1 == 5 | practice_g1 == 9, bcolor(ebblue*0.75) barwidth(0.5)) (bar practice_g2 practice_g1 if practice_g1 == 2 | practice_g1 == 6 | practice_g1 == 10, bcolor(orange*0.5) barwidth(0.5))  (bar practice_g2 practice_g1 if practice_g1 == 3 | practice_g1 == 7 | practice_g1 == 11, bcolor(orange*0.75) barwidth(0.5)) (rcap practice_g3 practice_g4 practice_g1), legend(off) xmlabel(1 "C" 2 "T"  3 "T + S" 5 "C" 6 "T"  7 "T + S" 9 "C" 10 "T"  11 "T + S", labsize(small) tlength(0))  xlabel(2 `" "Clean accessible parts" "with a stiff brush" "' 6 `""Scrub accessible parts" "with chlorine solution" "'  10 `""Wash interior with" "chlorine solution" "', labsize(vsmall) tlength(0)  labgap(40pt)) subtitle("c) Caretaker practice, 13 months", size(small) position(11) span) xtitle("")  ytitle("Share of caretakers regularly practicing", size(small))  xsize(8) ysize(12) yscale(range(0 1) lstyle(none)) ylabel(0(0.2)1, labsize(small) ) yline(0) xscale(range(0.5 11.5) lstyle(none)) plotregion(style(none)) name(practice_results, replace)

graph save $path_stata_graphs/practice_comparisons_round1, replace

*control, 98% of respondents report detaching the tubewell during cleaning; training only, 52% do so, training + supplies

tab training_treatment bleaching_detached , row 

*those who report knowing something is best practice but not doing it principally cite time as a reason 

foreach v in challenge_brush challenge_scrub challenge_wash {
	tab  `v' training_treatment_num, nolab
}

*most people report having the tools necessary to disassemble the tubewell

tab training_treatment tools_avail, row
gen tools_avail_d = (tools_avail == "yes")

reg tools_avail_d training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw)
 
*the majority say it is easy or very easy to obtain bleaching powder in the local market

tab training_treatment bleaching_available, row

*Of the households that we asked this question to (issues noted with pull data above) 
*almost no (N = 1) households in control purchased bleaching powder. No

tab training_treatment bleaching_purchased, row

* time taken 

*overall 97% report takes one hour

tab training_treatment cleaning_hours, row		
		
*support from community 

*most (91%) report receiving support either in labour (most commonly, 81%) or labour or cash (10%).  

tab cleaning_contrib

gen cleaning_contrib_d = (regexm(cleaning_contrib,"yes") == 1)
tab cleaning_contrib_d

*rates are slightly higher in treatment arms, insignificantly so

tab training_treatment cleaning_contrib_d, row

reg cleaning_contrib_d training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw)

*views on training

*everyone who attended says useful and that they understood training (but we only gave them a binary choice, so maybe not surprising)

tab training_useful training_attendance_d, missing
tab training_understand training_attendance_d, missing

*most (87%) say able to ask questions, somewhat more in training + supplies

tab training_treatment training_questions, row

*strengths of training

local i = 1

foreach v in training_strengths_supplied_powd training_strengths_supplied_guid training_strengths_info_precauti training_strengths_info_contamin training_strengths_demonstration {
	reg `v' training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw) level(90) nocons
	
	test training_treatment_1 = training_treatment_2
 

	estimates store e_strengths_`i'
	
	local i = `i' + 1

}

*Make Supplementary Figure 3

coefplot (e_strengths*, keep(training_treatment_1) label("Training") asequation swapnames msymbol(S) msize(small) mcolor(orange*0.5) ciopts(recast(rcap) lcolor(orange*0.5))) (e_strengths*, keep(training_treatment_2) label("Training and supplies") asequation swapnames msymbol(S) msize(small) mcolor(orange*0.75)  ciopts(recast(rcap) lcolor(orange*0.75))), sort(1) coeflabels(e_strengths_1 = "Supplied bleach powder" e_strengths_2 = "Information on correct procedure" e_strengths_3 = "Demonstration of correct procedure" e_strengths_4 = "Information on risks of inadequate cleaning" e_strengths_5 = "Information on bleach safety", labsize( small)) legend(rows(1) size(small)) msize(small) xtitle("Fraction of attendees listing strength", size( small)) xlabel(,labsize( small)) xscale(range(0 1)) xlabel(0(0.2)1) xsize(12) ysize(8) name(strengths, replace)

graph export $path_supplementary_graphs/strengths.png, replace

*note: none of the differences statistically significant apart from the supplies question

*the end


